from sklearn_benchmarks.reporting.hpo import HPOReporting
import pandas as pd
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
reporting_hpo = HPOReporting(config="config.yml")
reporting_hpo.run()
import numpy as np
import matplotlib.pyplot as plt
from sklearn_benchmarks.utils.misc import find_nearest
fit_times_for_max_scores = []
# Smoothed curves
plt.figure(figsize=(15, 10))
for hpo_result in reporting_hpo.data:
idx_max_score = np.argmax(hpo_result.grid_scores, axis=0)
fit_time_for_max_score = hpo_result.mean_grid_times[idx_max_score]
fit_times_for_max_scores.append(fit_time_for_max_score)
plt.plot(
hpo_result.mean_grid_times,
hpo_result.grid_scores,
c=f"tab:{hpo_result.color}",
label=hpo_result.legend
)
min_fit_time_all_constant = min(fit_times_for_max_scores)
plt.xlim(right=min_fit_time_all_constant)
plt.xlabel("Cumulated fit times in s")
plt.ylabel("Validation scores")
plt.legend()
plt.show()
# Speedup barplots
thresholds = [0.74, 0.76, 0.78]
_, axes = plt.subplots(len(thresholds), figsize=(12, 20))
base_hpo_result = list(filter(lambda result: result.lib == 'sklearn', reporting_hpo.data))[0]
for ax, threshold in zip(axes, thresholds):
base_scores = base_hpo_result.scores
base_fit_times = base_hpo_result.fit_times
base_idx_closest, _ = find_nearest(base_scores, threshold)
base_time = base_fit_times.iloc[base_idx_closest]
df_threshold = pd.DataFrame(columns=["speedup", "legend", "color"])
for hpo_result in reporting_hpo.data:
idx_closest, _ = find_nearest(hpo_result.scores, threshold)
lib_time = hpo_result.fit_times.iloc[idx_closest]
speedup = base_time / lib_time
row = dict(
speedup=speedup,
legend=hpo_result.legend,
color=hpo_result.color
)
df_threshold = df_threshold.append(row, ignore_index=True)
ax.bar(x=df_threshold["legend"], height=df_threshold["speedup"], width=0.3, color=df_threshold["color"])
ax.set_xlabel("Library")
ax.set_ylabel(f"Speedup")
ax.set_title(f"At score {threshold}")
plt.tight_layout()
plt.show()